/* File: drug_summary_stats.do
 * Author: Luca Maini
 * Purpose: calculates some summary stats about drugs
 * Output: Table OA1, Table 2
 *
 * Date Created: 01/13/2022
 *
 */

* open data (start from overlap measures)
use "${maindir}\combined_regression_dataset_augmented.dta", clear
preserve

* add line to Table B1 for number of on acquired drugs with nonmissing price 
* data
file open tableOA1_2 using "${paperdir}\Tables\Table OA1_2.csv", write replace
file write tableOA1_2 ",Individual deals, Drug-deal combinations" _n

unique Product if acqd == 1
local prods = r(sum)
local combo = r(N)

keep dealID*
gen test = _n
reshape long dealID, i(test) j(n)
keep dealID
drop if dealID == .
duplicates drop
count
file write tableOA1_2 "Excluding drugs fully missing price data,`r(N)',`prods', `combo'" _n

restore
preserve

* now add line for missing price data around the time of an event
bysort Product (year) : gen has_before = (acqd == 1 & w_log_net[_n-1] != .)
bysort Product (year) : gen has_after = (acqd == 1 & w_log_net[_n+1] != .)
gen has_both = has_before * has_after

unique Product if acqd == 1 & has_both == 1
local prods = r(sum)
local combo = r(N)

keep if acqd == 1 & has_both == 1

* remove dealIDs that do not refer to the acquisition in question
forvalues i = 1/4 {
	replace dealID`i' = . if yofd(dofq(acquisitionQrt`i')) != year
	}

keep dealID*
gen test = _n
reshape long dealID, i(test) j(n)
keep dealID
drop if dealID == .
duplicates drop
count

file write tableOA1_2 "Excluding drugs missing price data around time of event,`r(N)',`prods', `combo'" _n
file close tableOA1_2

restore

* identify acquired products
bysort Product (year) : egen acquired = max(acqd_count > 0)
bysort Product (year) : egen acquired_hor = max(same_atc3_acqd_count > 0 & stlth_acqd_count == 0)
bysort Product (year) : egen acquired_horSm = max(same_atc3_acqd_count > 0 & stlth_acqd_count > 0 & ///
												  same_atc3_acqd_count[_n-1] == same_atc3_acqd_count-1 & ///
												  stlth_acqd_count[_n-1] == stlth_acqd_count-1)
replace acquired_hor = 0 if acquired_horSm == 1 	// eliminate overlap
bysort Product (year) : egen acquired_by_lg = max(lg_acq_acqd > 0 & same_atc3_acqd_count == 0)

* generate variables to insert in the table
bysort Product (year) : gen WAC_growth = (WAC_price - WAC_price[_n-1])/WAC_price[_n-1]
bysort Product (year) : gen net_growth = (net_price - net_price[_n-1])/net_price[_n-1]

* Open table file, write header
file open table2 using "${paperdir}\Tables\Table 2.csv", write replace

file write table2 ",Non-acquired,,Acquired drugs,,,,," _n
file write table2 ",drugs,,Overall,,Horizontal Acquisitions,,Horizontal acquisitions below the HSR threshold,,Cross-market acquisitions by larger company," _n
file write table2 ",median,IQR,median,IQR,median,IQR,median,IQR,median,IQR," _n

* Loop through variables and lines
local lines `" `"Avg. yearly net sales (billion USD)"' `"Annual WAC growth"' `"Annual net price growth"' `"Fraction covered"' `"Fraction unrestricted"' `"Fraction preferred"' "'
local vars "net_sales WAC_growth net_growth frcov frunr frpre"

local varnum : word count `vars'

forvalues i = 1/`varnum' {
	
	local var : word `i' of `vars'
	local line : word `i' of `lines'
	
	* set display rules
	if "`var'" == "WAC_growth" | "`var'" == "net_growth" {
		qui sum `var' if acquired == 0, d
		local totMed : display %4.1f 100*r(p50)
		local totLB : display %4.1f 100*r(p25)
		local totUB : display %4.1f 100*r(p75)
		qui sum `var' if acquired == 1, d
		local acqMed : display %4.1f 100*r(p50)
		local acqLB : display %4.1f 100*r(p25)
		local acqUB : display %4.1f 100*r(p75)
		qui sum `var' if acquired_hor == 1, d
		local horMed : display %4.1f 100*r(p50)
		local horLB : display %4.1f 100*r(p25)
		local horUB : display %4.1f 100*r(p75)
		qui sum `var' if acquired_horSm == 1, d
		local horSmMed : display %4.1f 100*r(p50)
		local horSmLB : display %4.1f 100*r(p25)
		local horSmUB : display %4.1f 100*r(p75)
		qui sum `var' if acquired_by_lg == 1, d
		local lgMed : display %4.1f 100*r(p50)
		local lgLB : display %4.1f 100*r(p25)
		local lgUB : display %4.1f 100*r(p75)
		
			#delimit ;
		file write table2 "`line',
			`totMed'%,[`totLB'%; `totUB'%],
			`acqMed'%,[`acqLB'%; `acqUB'%],
			`horMed'%,[`horLB'%; `horUB'%],
			`horSmMed'%,[`horSmLB'%; `horSmUB'%],
			`lgMed'%,[`lgLB'%; `lgUB'%]" _n;
		#delimit cr
		}
	else {	
		qui sum `var' if acquired == 0, d
		local totMed : display %4.2f r(p50)
		local totLB : display %4.2f r(p25)
		local totUB : display %4.2f r(p75)
		qui sum `var' if acquired == 1, d
		local acqMed : display %4.2f r(p50)
		local acqLB : display %4.2f r(p25)
		local acqUB : display %4.2f r(p75)
		qui sum `var' if acquired_hor == 1, d
		local horMed : display %4.2f r(p50)
		local horLB : display %4.2f r(p25)
		local horUB : display %4.2f r(p75)
		qui sum `var' if acquired_horSm == 1, d
		local horSmMed : display %4.2f r(p50)
		local horSmLB : display %4.2f r(p25)
		local horSmUB : display %4.2f r(p75)
		qui sum `var' if acquired_by_lg == 1, d
		local lgMed : display %4.2f r(p50)
		local lgLB : display %4.2f r(p25)
		local lgUB : display %4.2f r(p75)

		#delimit ;
		file write table2 "`line',
			`totMed',[`totLB'; `totUB'],
			`acqMed',[`acqLB'; `acqUB'],
			`horMed',[`horLB'; `horUB'],
			`horSmMed',[`horSmLB'; `horSmUB'],
			`lgMed',[`lgLB'; `lgUB']" _n;
		#delimit cr
		}
	}

* Seventh line: sample sizes
unique Product if acquired == 0
local totN = r(unique)
unique Product if acquired == 1
local acqN = r(unique)
unique Product if acquired_hor == 1
local horN = r(unique)
unique Product if acquired_horSm == 1
local horSmN = r(unique)
unique Product if acquired_by_lg == 1
local lgN = r(unique)

file write table2 "Count,`totN',,`acqN',,`horN',,`horSmN',,`lgN'" _n

file close table2

* Additional stats cited in the paper
unique Product if acquired == 2
